﻿#region << 版 本 注 释 >>
/*----------------------------------------------------------------
 * 创建者：王明亮
 * 创建时间：2023/8/4 10:07:17
 * 版本：V1.0.0
 * 描述：
 *
 * ----------------------------------------------------------------
 * 修改人：
 * 时间：
 * 修改说明：
 *
 * 版本：V1.0.1
 *----------------------------------------------------------------*/
#endregion << 版 本 注 释 >>

using CrawlerComm.Handler;
using CrawlerModel.Meitu;
using CrawlerService.Helper;
using HtmlAgilityPack;
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;


namespace CrawlerService.Meitu
{
    /// <summary>
    /// MeituParseHtml 的摘要说明
    /// </summary>
    public class MeituParseHtmService
    {

        /// <summary>
        /// json数据文件夹存放文件夹位置
        /// </summary>
        private static string _dataDirectoryPath = Path.Combine(Directory.GetCurrentDirectory(), MeituConfig.JsonDataPath);

        /// <summary>
        /// 爬取json数据文件 
        /// </summary>
        private static string _CrawlerData = Path.Combine(_dataDirectoryPath, "categories.json");

        /// <summary>
        /// 开始爬取
        /// </summary>
        /// <returns></returns>
        public async Task StartAsync()
        {
            //专区集合
            List<BeautyZone> beautyZones = new List<BeautyZone>();
            //获取首页Html文档
            HtmlDocument htmlDocument = await LoadHtmlHelper.LoadHtmlFromUrlAsync(MeituConfig.Url);

            //创建存放数据的文件
            FileInfo fileInfo = new FileInfo(_CrawlerData);

            //获取到专区标签
            HtmlNodeCollection zoneHtmlNodes = htmlDocument.GetNodes(MeituConfig.ZoneXPath);
            //专区名称
            HtmlNodeCollection zoneNameHtmlNodes = htmlDocument.GetNodes(MeituConfig.ZoneNameXPath);

            if (zoneHtmlNodes != null && zoneHtmlNodes.Count> 0)
            {
                //专区个数
                var zoneCount = zoneHtmlNodes.Count;
                for (int i = 0; i < zoneCount; i++)
                {
                    //每个专区
                    BeautyZone beautyZone = new BeautyZone()
                    {
                        Tittle = zoneNameHtmlNodes[i].InnerText,
                        categoryBeauties = new List<EveryCategoryBeautyTop>()
                    };

                    HtmlNodeCollection topHtmlNodes = htmlDocument.GetNodes(string.Format( MeituConfig.TopXPath,i+1));
                    if (topHtmlNodes != null && topHtmlNodes.Count > 0)
                    {
                        //每个专区下所有分类
                        HtmlNodeCollection personCategoryHtmlNodes = htmlDocument.GetNodes(string.Format(MeituConfig.PersonCategoryXPath, i + 1));
                        //爬取所有人员的标签内容
                        HtmlNodeCollection personHtmlNodes = htmlDocument.GetNodes(string.Format(MeituConfig.PersonXPath, i + 1));

                        if (personCategoryHtmlNodes !=null && personHtmlNodes!=null  && personCategoryHtmlNodes.Count() > 0)
                        {

                            for (int j = 0; j < personCategoryHtmlNodes.Count(); j++)
                            {
                                //根据每个专区-分类下，进行遍历人气值人员排名
                                EveryCategoryBeautyTop everyCategoryBeautyTop = new EveryCategoryBeautyTop();
                                everyCategoryBeautyTop.Category = personCategoryHtmlNodes[j].InnerText;
                                everyCategoryBeautyTop.beauties = new List<Beauty>();
                                for (int k = 8*j; k < personHtmlNodes.Count(); k++)
                                {
                                    var child = personHtmlNodes[k];//每个美女对应的节点信息

                                    var i1 = child.GetSingleNode(child.XPath + "/i");//排名节点
                                    var img = child.GetSingleNode(child.XPath + "/a[1]/div[1]/img[1]");//姓名和图片地址
                                    var span2 = child.GetSingleNode(child.XPath + "/a[1]/div[2]/span[2]");//热度值
                                    //同一类别添加美女到集合
                                    everyCategoryBeautyTop.beauties.Add(new Beauty
                                    {
                                        No = i1.InnerText,
                                        Name = img.GetAttributeValue("alt", "未找到"),
                                        Popularity = span2.InnerText,
                                        ImageUrl = img.GetAttributeValue("data-echo", "未找到")
                                    }
                                    );
                                }
                                //将在同一分区内Top分类添加到集合
                                beautyZone.categoryBeauties.Add(everyCategoryBeautyTop);
                            }
                                    
                        }
                       
                    }
                    beautyZones.Add(beautyZone);
                }
                if (beautyZones.Count()> 0)
                {
                    //爬取数据转Json
                    string beautiesJsonData = JsonConvert.SerializeObject(beautyZones); ;
                    //写入爬取数据数据
                    string jsonFile = "beauties.json";
                    WriteData(jsonFile, beautiesJsonData);
                    //下载图片
                    DownloadImage(beautyZones);

                }
            }
        }

        /// <summary>
        /// 写入文件数据
        /// </summary>
        /// <param name="fileName"></param>
        /// <param name="data"></param>
        private void WriteData(string fileName, string data)
        {

            FileStream fs = new FileStream(fileName, FileMode.OpenOrCreate, FileAccess.Write);
            StreamWriter sw = new StreamWriter(fs);
            try
            {
                sw.Write(data);
            }
            finally
            {
                if (sw != null)
                {
                    sw.Close();
                }
            }
        }
 
        /// <summary>
        /// 下载图片
        /// </summary>
        /// <param name="beautyZones"></param>
        private async void DownloadImage(List<BeautyZone> beautyZones)
        {
            int count = 0;
            foreach (var beautyZone in beautyZones)
            {
                string rootPath =  System.IO.Directory.GetCurrentDirectory() +"\\DownloadImg\\"+ beautyZone.Tittle;
                foreach (var category in beautyZone.categoryBeauties)
                {
                   string downloadPath = rootPath + "\\" + category.Category;
                    foreach (var beauty in category.beauties)
                    {
                        count += 1;//下载数量累加
                        string filePath = downloadPath + "\\" + beauty.Name+".jpg";
                        if (!Directory.Exists(downloadPath))
                        {
                            Directory.CreateDirectory(downloadPath);
                        }
                        UpdateUIModel updateUIModel = new UpdateUIModel()
                        {
                            DownloadNumber =count,
                            Category = category.Category,
                            beauty = beauty

                        };
                        //更新UI
                        UpdateFrmHandler.OnUpdateUI(updateUIModel);
                        //异步下载
                        await  LoadHtmlHelper.DownloadImg(beauty.ImageUrl,filePath);
                    }
                }
            }
        }
    }
}
